### Replication file for 
### "Does Chinese FDI in Africa Inspire for a China Model of Development"

### Part 2: Main results
rm(list=ls())

### Use "Alt+O" to collapse all sections and "Shift+Alt+O" to expand all

### Set your own working directory

# Load required packages --------------------------------------------------

### packages
library(tidyverse)
library(miceadds)
library(car)
library(xtable)
library(ggplot2)
library(logr) # to export the log file (optional)
library(flextable) # to export tables to Word (optional)

### Suppress output with the format of scientific number
options(scipen=999)

# Customize theme of plots ------------------------------------------------
my_theme<-theme_bw()+
  theme(panel.grid = element_blank(),
        plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"),
        axis.text.y = element_text(margin = margin(t = 0, r = 5, b = 5, l = 5),
                                   size=14,color="black"),
        axis.text.x = element_text(margin = margin(t = 0, r = 0, b = 5, l = 0),
                                   size=14,color="black",vjust = -0.5),
        axis.ticks = element_line(color="black",size=1),
        axis.ticks.length = unit(0.1, "cm"),
        axis.title.x = 
          element_text(size=18,margin = margin(t = 10, r = 0, b = 10, l = 0)),
        axis.title.y = 
          element_text(size=18,margin = margin(t = 0, r = 0, b = 5, l = 0)),
        strip.text = element_text(colour = 'black',size=18)
        )

# Load required dataset ---------------------------------------------------

### load the main dataset for analysis
load("data_respno.RData")

# Subsample by sector: manufacturing -------------------------------------
data_respno_manuf<-list()
for (j in 1:length(data_respno)){
  
  # Close to active or eventual manufacturing projects
  data_respno_manuf[[j]]<-data_respno[[j]] %>% 
    filter(((D_close_active_fdi==1 & Num_fdi_manuf_active>0)|
              (D_close_eventual_fdi==1 & Num_fdi_manuf_eventual>0)))
  
  # Countries included in the subsample
  country_select<-data_respno_manuf[[j]] %>% pull(countryname) %>% unique()
  
  # Respondents not close to any projects
  data_notclose<-data_respno[[j]] %>% filter(countryname %in% country_select,
                                             D_close_fdi==0)
  
  # Row bind respondents close to active or eventual and not close
  data_respno_manuf[[j]]<-rbind(data_respno_manuf[[j]],data_notclose)
  
}

# Subsample by sector: service --------------------------------------------
data_respno_service<-list()
for (j in 1:length(data_respno)){
  
  # Close to active or eventual service projects
  data_respno_service[[j]]<-data_respno[[j]] %>% 
    filter(((D_close_active_fdi==1 & Num_fdi_service_active>0)|
              (D_close_eventual_fdi==1 & Num_fdi_service_eventual>0)))
  
  # Countries included in the subsample
  country_select<-data_respno_service[[j]] %>% pull(countryname) %>% unique()
  
  # Respondents not close to any projects
  data_notclose<-data_respno[[j]] %>% filter(countryname %in% country_select,
                                             D_close_fdi==0)
  
  # Row bind respondents close to active or eventual and not close
  data_respno_service[[j]]<-rbind(data_respno_service[[j]],data_notclose)
  
}

# Subsample by sector: resource -------------------------------------------
data_respno_resource<-list()
for (j in 1:length(data_respno)){
  
  # Close to active or eventual resource projects
  data_respno_resource[[j]]<-data_respno[[j]] %>% 
    filter(((D_close_active_fdi==1 & Num_fdi_resource_active>0)|
              (D_close_eventual_fdi==1 & Num_fdi_resource_eventual>0)))
  
  # Countries included in the subsample
  country_select<-data_respno_resource[[j]] %>% pull(countryname) %>% unique()
  
  # Respondents not close to any projects
  data_notclose<-data_respno[[j]] %>% filter(countryname %in% country_select,
                                             D_close_fdi==0)
  
  # Row bind respondents close to active or eventual and not close
  data_respno_resource[[j]]<-rbind(data_respno_resource[[j]],data_notclose)
  
}

# A function that estimate the effects and output tables ------------------
func_model<-function(d,dv){
  
  m<-lm.cluster(data=d,
                # cluster the standard error in the survey cluster
                cluster=d$uniqueea,
                # dependent variables and status to Chinese FDI
                d[,dv]~D_close_active_fdi+D_close_eventual_fdi
                # control variables
                +as.factor(Urban)+Age+I(Age^2)+as.factor(Gender)+
                  as.factor(Education)
                # country fixed effects
                +as.factor(countryname)
  )
  
  # Coefficients of interests (Active and Eventual)
  coefs<-unname(coef(m)[2:3])
  
  # t values
  tvalue<-unname(summary(m)[2:3,3])
  
  # Active-Eventual
  diff<-coefs[1]-coefs[2]
  test_diff<-linearHypothesis(m,"D_close_active_fdi=D_close_eventual_fdi")
  F_diff<-test_diff$Chisq[2]
  p_diff<-test_diff$`Pr(>Chisq)`[2]
  
  # Number of countries
  N_ctry<-d %>% 
    pull(countryname) %>% 
    unique() %>% 
    length()
  
  # Number of enumeration areas
  N_ea<-d %>% 
    subset(select=c(dv,"D_close_active_fdi","D_close_eventual_fdi",
                    "Urban","Age","Gender","Education","uniqueea")) %>% 
    na.omit() %>% 
    pull(uniqueea) %>% 
    unique() %>% 
    length()
  
  # Number of observations
  N_obs<-d %>% 
    subset(select=c(dv,"D_close_active_fdi","D_close_eventual_fdi",
                    "Urban","Age","Gender","Education")) %>% 
    na.omit() %>% 
    nrow()
  
  # Combine the results
  out<-c(coefs[1],tvalue[1],coefs[2],tvalue[2],diff,F_diff,p_diff,NA,NA,
         N_ctry,N_ea,N_obs,summary(m$lm_res)$adj.r.squared)
  print(out)
  
} 

# Descriptive statistics: Table 1 -----------------------------------------

### Status of active and eventual (25km)
descriptive_25<-data_respno[[1]] %>% 
  subset(select=c(D_close_active_fdi,D_close_eventual_fdi)) %>% 
  rename(Active_25=D_close_active_fdi,Eventual_25=D_close_eventual_fdi)

### Status of active and eventual (50km)
descriptive_50<-data_respno[[2]] %>% 
  subset(select=c(D_close_active_fdi,D_close_eventual_fdi)) %>% 
  rename(Active_50=D_close_active_fdi,Eventual_50=D_close_eventual_fdi)

### Other variables
descriptive_other<-data_respno[[2]] %>% 
  subset(select=c(BestModel_China,Positive_D,Positive_O,
                  Business_Positive,Infrastructure_Positive,
                  ProductCost_Positive,People_Positive,
                  Extract_Negative,Land_Negative,JobBusiness_Negative,
                  ProductQuality_Negative,People_Negative,
                  Urban,Age,Gender,Education))

### Combine the variables to make the descriptive table
descriptive_table<-cbind(descriptive_25,descriptive_50,descriptive_other)

### Manually print the descriptive table: Table 1
N<-apply(descriptive_table,2,function(x) sum(!is.na(x)))
Mean<-apply(descriptive_table,2,function(x) mean(x,na.rm=T))
StDev<-apply(descriptive_table,2,function(x) sd(x,na.rm=T))
Min<-apply(descriptive_table,2,function(x) min(x,na.rm=T))
Pctl_25<-apply(descriptive_table,2,function(x) quantile(x,0.25,na.rm=T))
Pctl_75<-apply(descriptive_table,2,function(x) quantile(x,0.75,na.rm=T))
Max<-apply(descriptive_table,2,function(x) max(x,na.rm=T))
Variables<-c("Close to active Chinese FDI (25km)",
             "Close to eventual Chinese FDI (25km)",
             "Close to active Chinese FDI (50km)",
             "Close to eventual Chinese FDI (50km)",
             "China as the best model of development",
             "China's image (dummy)","China's image (ordinal)",
             "China's business investment",
             "Investment in infrastructure/development",
             "The cost of Chinese products","Chinese people and culture",
             "China's extraction of resources","Land grabbing",
             "Taking jobs or business away","The quality of Chinese products",
             "Chinese people's behavior",
             "Urban","Age","Gender","Education")

Table_1<-cbind.data.frame(Variables,N,Mean,StDev,Min,Pctl_25,Pctl_75,Max)
Table_1[,-c(1,2)]<-apply(Table_1[,-c(1,2)],2,
                         function(x) format(round(x,3),nsmall=3))
rownames(Table_1)<-NULL

### Print Table 1
Table_1

### Print the log file for Table 1
# log_open("C:/Users/xwang21/Dropbox/Research/China Model Africa/WD_R&R/Replication File WD/Table_1.log")
# log_print(Table_1)
# log_close()

### Export Table 1 as a Word document
# Table_1_doc<-flextable(Table_1)
# save_as_docx(Table_1_doc,path = "Your path")


# Main results: Table 2 ---------------------------------------------------

### Dependent variables
dv<-c("BestModel_China","Positive_O","Positive_D")

### Use datasets with 25km and 50km distance buffers
num_distance<-2

### Estimate and output the main results
out_main<-rep(list(rep(list(0),length(dv))),num_distance)
for (j in 1:num_distance){
  
  for (k in 1:length(dv)){
      
      out_main[[j]][[k]]<-func_model(data_respno[[j]],dv[k])
      
  }
}

### A function to format regression tables
func_table<-function(out_list){
  
  out_table<-do.call(cbind,out_list)
  
  out_table[-c(10:12),]<-format(round(out_table[-c(10:12),],3),nsmall=3)
  out_table<-trimws(out_table)
  out_table[8:9,]<-"Yes"
  out_table[c(2,4),]<-paste("(",out_table[c(2,4),],")",sep="")
  
  names_iv<-c("Active","","Eventual","","Active-Eventual",
              "F test: Active-Eventual=0","p value",
              "Individual Controls","Country fixed effects",
              "Number of countries","Number of enumeration areas",
              "Number of observations","Adjusted R squared")
  
  out_table<-cbind(names_iv,out_table)
  
}

### Print Table 2
Table_2<-func_table(c(out_main[[1]],out_main[[2]]))
colnames(Table_2)<-c("","(1)","(2)","(3)","(4)","(5)","(6)")
Table_2

### Print the log file for Table 2
# log_open("C:/Users/xwang21/Dropbox/Research/China Model Africa/WD_R&R/Replication File WD/Table_2.log")
# log_print(Table_2)
# log_close()

### Export Table 2 as a Word document
# Table_2_doc<-flextable(as.data.frame(Table_2))
# save_as_docx(Table_2_doc,path="Your path")

# Plot main effects by distance to Chinese FDI: Figure 2 ------------------

### Estimate effects and standard errors for each distance cutoff
coef_diff<-se_diff<-vector()
for (j in 1:length(data_respno)){
  
  m<-lm.cluster(data=data_respno[[j]],
                #cluster the standard error in the survey cluster
                cluster=data_respno[[j]]$uniqueea,
                # dv and key iv
                BestModel_China~D_close_active_fdi+D_close_eventual_fdi
                # control variables
                +as.factor(Urban)+Age+I(Age^2)+as.factor(Gender)+
                  as.factor(Education)
                # country fixed effects
                +as.factor(countryname)
  )
  
  coef_diff[j]<-coef(m)[2]-coef(m)[3]
  se_diff[j]<-sqrt(vcov(m)[2,2]+vcov(m)[3,3]-2*vcov(m)[2,3])
  
}
Effects_distance<-bind_cols(Cut=c("25km","50km","75km","100km","125km",
                                  "150km","175km","200km"),
                            Estimate=coef_diff,
                            SE=se_diff)

### Plot Figure 2
windows(width = 600,height = 350)
Effects_distance %>% 
  mutate(Cut=factor(Cut,
                    levels=c("25km","50km","75km","100km","125km",
                             "150km","175km","200km"))) %>% 
  ggplot()+
  geom_pointrange(aes(x=Cut,
                      y=Estimate,
                      ymin=Estimate-1.96*SE,
                      ymax=Estimate+1.96*SE))+
  geom_hline(yintercept = 0,lty=2)+
  labs(x="Distance cut-offs",y="Active - Eventual")+
  scale_y_continuous(limits=c(-0.075,0.025))+
  my_theme

### Save Figure 2 as Tiff
# ggsave("Figure_2.tiff")

# Plot positive factors by sector: Figure 3 -------------------------------

### Sector, subsample by sector (50km), and dependent variables (positive)
name_sector<-c("Manufacturing","Resources","Service")
data_sector<-list(data_respno_manuf[[2]],data_respno_resource[[2]],
                  data_respno_service[[2]])
dv_positive<-c("People_Positive","ProductCost_Positive",
               "Infrastructure_Positive","Business_Positive")
name_dv<-c("Chinese people and culture","The cost of Chinese products",
           "China's investment in \n infrastructure or other development",
           "China's business investment")

### Estimate effects and standard errors for each subsample
coef_diff<-se_diff<-rep(list(rep(list(0),length(dv_positive))),
                        length(data_sector))
for (d in 1:length(data_sector)){
  
  for (k in 1:length(dv_positive)){
    
    m<-lm.cluster(data=data_sector[[d]],
                  #cluster the standard error in the survey cluster
                  cluster=data_sector[[d]]$uniqueea,
                  # dv and key iv
                  data_sector[[d]][,dv_positive[k]]~D_close_active_fdi+
                    D_close_eventual_fdi
                  # control variables
                  +as.factor(Urban)+Age+I(Age^2)+as.factor(Gender)+
                    as.factor(Education)
                  # country fixed effects
                  +as.factor(countryname)
    )
    
    coef_diff[[d]][[k]]<-bind_cols(sector=name_sector[d],
                                   dv_var=name_dv[k],
                                   Estimate=coef(m)[2]-coef(m)[3])
    se_diff[[d]][[k]]<-bind_cols(sector=name_sector[d],
                                 dv_var=name_dv[k],
                                 SE=sqrt(vcov(m)[2,2]+vcov(m)[3,3]-
                                           2*vcov(m)[2,3]))
  }
  
}
coef_diff<-bind_rows(coef_diff)
se_diff<-bind_rows(se_diff)
Sector_positive<-merge(coef_diff,se_diff,by=c("sector","dv_var"),all.x=T)

### Plot Figure 3
windows(width=600,height=350)
Sector_positive %>% 
  mutate(dv_var=factor(dv_var,levels=name_dv),
         sector=factor(sector,levels=name_sector)) %>% 
  ggplot()+
  geom_pointrange(aes(x=dv_var,
                      y=Estimate,
                      ymin=Estimate-1.96*SE,
                      ymax=Estimate+1.96*SE))+
  geom_hline(yintercept = 0,lty=2)+
  coord_flip()+
  labs(x="",y="Active - Eventual")+
  my_theme+
  facet_wrap(~sector)

### Save Figure 3
# ggsave("Figure_3.tiff")

# Plot negative factors by sector: Figure 4 -------------------------------

### Sector, subsample by sector (50km), and dependent variables (negative)
dv_negative<-c("People_Negative","ProductQuality_Negative","Extract_Negative",
               "Land_Negative","JobBusiness_Negative")
name_dv<-c("Chinese people's behavior","The quality of Chinese products",
           "China's extraction of resources","Land grabbing",
           "Taking jobs or business away")

### Estimate effects and standard errors
coef_diff<-se_diff<-rep(list(rep(list(0),length(dv_negative))),
                        length(data_sector))
for (d in 1:length(data_sector)){
  
  for (k in 1:length(dv_negative)){
    
    m<-lm.cluster(data=data_sector[[d]],
                  #cluster the standard error in the survey cluster
                  cluster=data_sector[[d]]$uniqueea,
                  # dv and key iv
                  data_sector[[d]][,dv_negative[k]]~D_close_active_fdi+
                    D_close_eventual_fdi
                  # control variables
                  +as.factor(Urban)+Age+I(Age^2)+as.factor(Gender)+
                    as.factor(Education)
                  # country fixed effects
                  +as.factor(countryname)
    )
    
    coef_diff[[d]][[k]]<-bind_cols(sector=name_sector[d],
                                   dv_var=name_dv[k],
                                   Estimate=coef(m)[2]-coef(m)[3])
    se_diff[[d]][[k]]<-bind_cols(sector=name_sector[d],
                                 dv_var=name_dv[k],
                                 SE=sqrt(vcov(m)[2,2]+vcov(m)[3,3]-
                                                 2*vcov(m)[2,3]))
  }
  
}
coef_diff<-bind_rows(coef_diff)
se_diff<-bind_rows(se_diff)
Sector_negative<-merge(coef_diff,se_diff,by=c("sector","dv_var"),all.x=T)

### Plot Figure 4
windows(width=600,height=350)
Sector_negative %>% 
  mutate(dv_var=factor(dv_var,levels=name_dv),
         sector=factor(sector,levels=name_sector)) %>% 
  ggplot()+
  geom_pointrange(aes(x=dv_var,
                      y=Estimate,
                      ymin=Estimate-1.96*SE,
                      ymax=Estimate+1.96*SE))+
  geom_hline(yintercept = 0,lty=2)+
  coord_flip()+
  labs(x="",y="Active - Eventual")+
  my_theme+
  facet_wrap(~sector)

### Save Figure 4
# ggsave("Figure_4.tiff")

# Plot randomization test: Figure 5 ---------------------------------------

### Set seed
set.seed(1221)

### Number of draws
draws<-5000

### An empty list to store placebo effects
coef_diff_placebo<-NA

### Generate placebo effects (it will take some time)
for (i in 1:draws){
  
  # ID of respondents close to active or eventual projects
  obs_close<-data_respno[[2]] %>% 
    filter(D_close_fdi==1) %>% 
    pull(respno)
  
  # Number of respondents close to active projects
  N_active<-data_respno[[2]] %>% 
    filter(D_close_active_fdi==1) %>% 
    nrow()
  
  # Randomly sample "N_active" respondents from those active or eventual
  obs_active<-sample(obs_close,N_active)
  obs_eventual<-obs_close[!obs_close %in% obs_active]
  
  # Recode active and eventual in the placebo dataset
  d_placebo<-data_respno[[2]] %>% 
    mutate(D_close_active_fdi_placebo=ifelse(respno %in% obs_active,1,0),
           D_close_eventual_fdi_placebo=ifelse(respno %in% obs_eventual,1,0))
  
  # Estiamte the model using placebo data
  m<-lm.cluster(data=d_placebo,
                #cluster the standard error in the survey cluster
                cluster=d_placebo$uniqueea,
                # dv and key iv
                BestModel_China~D_close_active_fdi_placebo+
                  D_close_eventual_fdi_placebo
                # control variables
                +as.factor(Urban)+Age+I(Age^2)+as.factor(Gender)+
                  as.factor(Education)
                # country fixed effects
                +as.factor(countryname)
  )
  
  coef_diff_placebo[i]<-coef(m)[2]-coef(m)[3]
  
  print(i)
  
}
coef_diff_placebo<-as.data.frame(coef_diff_placebo)
colnames(coef_diff_placebo)<-"placebo"
# save(coef_diff_placebo,file="coef_diff_placebo.RData")

### Plot Figure 5
windows(width = 600,height = 350)
ggplot(coef_diff_placebo)+
  geom_histogram(aes(x=placebo),bins = 30,fill="gray80",color="black")+
  geom_vline(xintercept=-0.045,lwd=1)+
  scale_x_continuous(limits=c(-0.05,0.05),expand = c(0,0),breaks=c(-0.045,-0.030,0,0.030))+
  scale_y_continuous(limits=c(0,900),expand=c(0,0))+
  annotate("text", x=-0.035, y=400, label= "Estimated effect",size=6.5)+
  labs(x="Distribution of placebo effects",y="")+
  my_theme

### Save Figure 5
# ggsave("Figure_5.tiff")
